"""
Phase 5: Absorptive Capacity
==============================
Does the effect of demographic capital depend on recipient characteristics?
Triple interactions: predicted_demo_inflows × institutions / financial depth / human capital.
"""

import pandas as pd
import numpy as np
from pathlib import Path
import sys
import warnings
warnings.filterwarnings('ignore')

PROJECT_DIR = Path(__file__).resolve().parent.parent
ROOT_DIR = PROJECT_DIR.parent
sys.path.insert(0, str(ROOT_DIR / "multilateral" / "src"))
from model import PanelGLS

DATA = PROJECT_DIR / "data" / "processed"
OUT_TABLES = PROJECT_DIR / "output" / "tables"
OUT_TABLES.mkdir(parents=True, exist_ok=True)


def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.1: return '*'
    return ''


def run_model(df, y_var, x_vars, label):
    """Run PanelGLS and return results dict."""
    cols = [y_var] + x_vars + ['iso3', 'year']
    sub = df[[c for c in cols if c in df.columns]].dropna()
    actual_x = [v for v in x_vars if v in sub.columns]
    if len(sub) < 50 or len(actual_x) == 0:
        print(f"  {label}: insufficient obs ({len(sub)}), skipping")
        return None

    gls = PanelGLS()
    gls.fit(sub[y_var].values, sub[actual_x].values,
            sub['iso3'].values, sub['year'].values)

    print(f"\n  {label} (N={gls.n_obs}, R²={gls.r_squared:.4f})")
    row = {'model': label, 'y_var': y_var, 'n_obs': gls.n_obs,
           'n_countries': gls.n_countries, 'r_squared': gls.r_squared}
    for i, name in enumerate(actual_x):
        s = stars(gls.pvalues[i])
        print(f"    {name:40s} {gls.beta[i]:10.5f} ({gls.se[i]:.5f}) {s}")
        row[f'{name}_coef'] = gls.beta[i]
        row[f'{name}_se'] = gls.se[i]
        row[f'{name}_p'] = gls.pvalues[i]
    return row


def main():
    print("=" * 70)
    print("PHASE 5: ABSORPTIVE CAPACITY")
    print("=" * 70)

    df = pd.read_csv(DATA / "deepening_panel.csv")
    print(f"Panel: {len(df)} obs, {df['iso3'].nunique()} countries")

    # Determine flow variable to use
    flow_var = None
    for candidate in ['log_predicted_demo_inflows', 'log_predicted_total_inflows',
                      'log_total_portfolio_inflows']:
        if candidate in df.columns and df[candidate].notna().sum() > 200:
            flow_var = candidate
            break

    if flow_var is None:
        # Fall back to demographic index Z_1 as proxy for demographic capital pressure
        print("WARNING: No flow instrument available. Using Z_1 as demographic proxy.")
        flow_var = 'Z_1'

    print(f"Using flow variable: {flow_var} ({df[flow_var].notna().sum()} obs)")

    # Construct interaction terms
    interaction_vars = {
        'gross_liab_gdp': 'Financial Depth',
        'rule_of_law': 'Institutions (Rule of Law)',
        'hc': 'Human Capital (PWT)',
        'human_capital': 'Human Capital (Panel)',
    }

    for mod_var, mod_label in interaction_vars.items():
        if mod_var in df.columns:
            df[f'{flow_var}_x_{mod_var}'] = df[flow_var] * df[mod_var]
        else:
            print(f"  Moderator '{mod_var}' not in panel")

    # Income quartile interactions
    if 'gdp_pc_ppp' in df.columns:
        df['income_quartile'] = pd.qcut(df['gdp_pc_ppp'].dropna(), 4,
                                         labels=['Q1', 'Q2', 'Q3', 'Q4'])
        for q in ['Q1', 'Q2', 'Q3', 'Q4']:
            df[f'income_{q}'] = (df['income_quartile'] == q).astype(float)
            df[f'{flow_var}_x_income_{q}'] = df[flow_var] * df[f'income_{q}']

    results = []
    controls = ['fiscal_bal_gdp', 'nfa_gdp_lag', 'log_rel_opw', 'kaopen']
    outcomes = ['delta_log_kl', 'gross_fixed_investment_gdp']

    for y_var in outcomes:
        if y_var not in df.columns:
            continue
        y_label = 'K/L Growth' if 'kl' in y_var else 'Investment/GDP'

        print(f"\n{'='*50}")
        print(f"Outcome: {y_label}")
        print(f"{'='*50}")

        # Baseline: flow → outcome
        r = run_model(df, y_var, [flow_var] + controls,
                      f'Baseline: {y_label}')
        if r: results.append(r)

        # Interaction models
        for mod_var, mod_label in interaction_vars.items():
            int_var = f'{flow_var}_x_{mod_var}'
            if int_var not in df.columns:
                continue
            r = run_model(df, y_var,
                          [flow_var, mod_var, int_var] + controls,
                          f'{y_label} × {mod_label}')
            if r: results.append(r)

        # Income quartile interactions
        if 'income_Q1' in df.columns:
            q_vars = [f'{flow_var}_x_income_{q}' for q in ['Q1', 'Q2', 'Q3', 'Q4']]
            q_vars_avail = [v for v in q_vars if v in df.columns]
            r = run_model(df, y_var,
                          [flow_var] + q_vars_avail + controls,
                          f'{y_label} × Income Quartile')
            if r: results.append(r)

    # ── Composition test: Portfolio vs. FDI ──
    print(f"\n{'='*50}")
    print("Composition: Portfolio vs. FDI")
    print(f"{'='*50}")

    for flow_type, flow_label in [('log_total_portfolio_inflows', 'Portfolio'),
                                   ('log_total_fdi_inflows', 'FDI')]:
        if flow_type not in df.columns:
            continue
        for y_var in outcomes:
            if y_var not in df.columns:
                continue
            y_label = 'K/L' if 'kl' in y_var else 'Invest'
            r = run_model(df, y_var, [flow_type] + controls,
                          f'{flow_label} → {y_label}')
            if r: results.append(r)

    # ── Asset price vs. productive investment test ──
    print(f"\n{'='*50}")
    print("Asset Price vs. Productive Investment")
    print(f"{'='*50}")

    # If demo inflows predict investment/GDP → productive channel
    # If no investment response → asset inflation
    for y_var, y_label in [('gross_fixed_investment_gdp', 'Fixed Investment/GDP'),
                            ('rgdp_growth', 'GDP Growth'),
                            ('delta_log_kl', 'Capital Deepening')]:
        if y_var not in df.columns:
            continue
        r = run_model(df, y_var, [flow_var] + controls,
                      f'Productive Test: {flow_var} → {y_label}')
        if r: results.append(r)

    # ── Save ──
    results_df = pd.DataFrame(results)
    results_df.to_csv(OUT_TABLES / "absorptive_capacity_results.csv", index=False)

    with open(OUT_TABLES / "absorptive_capacity.md", 'w') as f:
        f.write("# Table 5: Absorptive Capacity\n\n")
        f.write(f"*Flow variable: {flow_var}*\n\n")

        # Group by outcome
        for y_var in outcomes:
            y_label = 'Capital Deepening' if 'kl' in y_var else 'Investment/GDP'
            subset = [r for r in results if r.get('y_var') == y_var
                      and 'Composition' not in r.get('model', '')
                      and 'Productive' not in r.get('model', '')]
            if not subset:
                continue
            f.write(f"\n## {y_label}\n\n")
            f.write("| Model | N | R² | Flow β | Interaction β | Int. p |\n")
            f.write("|-------|---|-----|--------|--------------|--------|\n")
            for r in subset:
                fb = r.get(f'{flow_var}_coef', '')
                fb_str = f"{fb:.4f}" if isinstance(fb, float) else ''
                # Find interaction term
                int_coef = ''
                int_p = ''
                for k in r:
                    if '_x_' in k and k.endswith('_coef'):
                        int_coef = f"{r[k]:.4f}"
                        int_p = f"{r[k.replace('_coef', '_p')]:.4f}"
                        break
                f.write(f"| {r['model']} | {r['n_obs']} | {r['r_squared']:.4f} "
                        f"| {fb_str} | {int_coef} | {int_p} |\n")
            f.write("\n")

        f.write("*p<0.1, **p<0.05, ***p<0.01\n")

    print("\nPhase 5 complete.")


if __name__ == '__main__':
    main()
